tlc <- read_parquet(file = "./data/yellow_tripdata_2022-06.parquet")
tlc_zone <- st_read("./data/taxi_zones/taxi_zones.shp", quiet = TRUE)
ggplot(tlc_zone) + geom_sf() + theme_inset() + theme_bw()

tlc_zone <- st_transform(tlc_zone, crs = 4326)
ggplot(tlc_zone) + geom_sf(extent ="device")

our_neighbourhood <- tlc_zone %>%
filter(zone == "Gramercy" | zone == "Kips Bay")
ggplot(tlc_zone) + geom_sf() +
theme_inset() +
geom_sf(data = our_neighbourhood, fill = "red")

bbox <- st_bbox(tlc_zone) %>% as.numeric
nyc_map <- get_stamenmap(bbox = bbox, messaging = FALSE, zoom = 11,
maptype = "toner-lite", format = c("png"))
ggmap(nyc_map) +
geom_sf(data = our_neighbourhood, fill = "red", inherit.aes = FALSE)

pu_count <- tlc %>% group_by(PULocationID) %>% tally()
pu_count
## # A tibble: 261 x 2
## PULocationID n
## <int> <int>
## 1 1 1027
## 2 2 3
## 3 3 40
## 4 4 4267
## 5 5 58
## 6 6 7
## 7 7 2241
## 8 8 24
## 9 9 40
## 10 10 1433
## # ... with 251 more rows
pu_count <- pu_count %>% rename(LocationID = PULocationID)
joined_tbl <- left_join(tlc_zone, pu_count)
ggplot(joined_tbl) + geom_sf() + aes(fill = n)

ggplot(joined_tbl %>% filter(borough == "Manhattan")) + geom_sf() + aes(fill = n)

ggplot(joined_tbl %>% filter(borough == "Manhattan")) + geom_sf() + aes(fill = n) + scale_fill_viridis_c(option = "A")

bbox <- st_bbox(tlc_zone) %>% as.numeric
nyc_map <- get_stamenmap(bbox = bbox, messaging = FALSE, zoom = 11,
maptype = "toner-lite", format = c("png"))
ggmap(nyc_map)

ggmap(nyc_map) +
geom_sf(data = our_neighbourhood, fill = "red", inherit.aes = FALSE)

ggmap(nyc_map) +
geom_sf(data = joined_tbl, aes(fill = n), inherit.aes = FALSE) + scale_fill_viridis_c(option = "A")

tlc_zone_new <- left_join(tlc_zone, pu_count, by = "LocationID") %>% filter(borough == "Manhattan")
plot(our_neighbourhood)

random_locs <- st_sample(our_neighbourhood, type = "random", size = 10)
ggplot() + geom_sf(data = our_neighbourhood) + geom_sf(data = random_locs)

storage <- list()
map_output <- ggmap(nyc_map)
for (zone_id in 1:nrow(tlc_zone_new)){
zone <- tlc_zone_new[zone_id, ]
zone$n[is.na(zone$n)] <- 0
sampled_points <- zone %>% st_sample(type = "random", size = round(zone$n/100))
storage[[zone_id]] <- sampled_points
map_output <- map_output + geom_sf(data = storage[[zone_id]], inherit.aes = FALSE, size = 0.5, alpha = 0.1)
}
map_output
